# ------------------------------------------------------------------------------#
# Description: Create
#   Figure 3: Spatial and temporal heterogeneity
#   Table 4: Robustness to removing heterogeneous basins
# Project:     Peer Effects in Voluntary Environmental Policies:
#              An Application to Urban Water Quality
# Author:      Daniel A. Brent | dab320@psu.edu
# Created:     2025-06-11
# ------------------------------------------------------------------------------#

# Clear environment ------------------------------------------------------------
rm(list = ls())
gc()

# Load required packages -------------------------------------------------------
library(pacman)
p_load(data.table, fixest, modelsummary, ggplot2)
options(scipen = 999)

# Load adoption data -----------------------------------------------------------
load("data/adoptions/adoptions.RData")

# Merge peer adoption buckets --------------------------------------------------
load("data/bucket/peer_adopt_buckets_any.RData")
dat <- merge(adopt, peer.adopt.bucket.any, by = c("pin", "year"))
rm(adopt, peer.adopt.bucket.any)

load("data/bucket/peer_adopt_buckets_rg.RData")
dat <- merge(dat, peer.adopt.bucket.rg, by = c("pin", "year"))
rm(peer.adopt.bucket.rg)

load("data/bucket/peer_adopt_buckets_any_rg.RData")
dat <- merge(dat, peer.adopt.bucket.any.rg, by = c("pin", "year"))
rm(peer.adopt.bucket.any.rg)

load("data/bucket/peer_adopt_buckets_cs.RData")
dat <- merge(dat, peer.adopt.bucket.cs, by = c("pin", "year"))
rm(peer.adopt.bucket.cs)

load("data/bucket/peer_adopt_buckets_both.RData")
dat <- merge(dat, peer.adopt.bucket.both, by = c("pin", "year"))
rm(peer.adopt.bucket.both)

# Merge peer eligibility buckets -----------------------------------------------
load("data/bucket/peer_elig_buckets_cs.RData")
dat <- merge(dat, peer.buckets.cs, by = c("pin", "year"))
rm(peer.buckets.cs)

load("data/bucket/peer_elig_buckets_rg.RData")
dat <- merge(dat, peer.buckets.rg, by = c("pin", "year"))
rm(peer.buckets.rg)

# Merge total peers ------------------------------------------------------------
load("data/bucket/total_peers.RData")
dat <- merge(dat, total.peers, by = "pin")
rm(total.peers)

# Merge parcel data ------------------------------------------------------------
load("data/parcel_build/parcel_build.RData")
dat <- merge(dat, parcel.build[, .(pin, zip5, sub.area, sqft, lot, beds, baths,
                                   yearbuilt, val.land, val.improve,
                                   ren.pre90, ren.90.99, ren.00.09, ren.10,
                                   water.prob)], by = "pin")
rm(parcel.build)

# Merge census data ------------------------------------------------------------
load("data/census/all_parcels_census_2010.RData")
parcels.2010[, blk := as.numeric(paste0(block, blkgrp))]
dat <- merge(dat, parcels.2010[, .(pin, tract, blkgrp, block, blk)])
rm(parcels.2010)

# Scale peer adoption variables ------------------------------------------------
dat[, peer.adopt.any.100.scale     := peer.adopt.any.100 / 100]
dat[, peer.adopt.any.rg.100.scale  := peer.adopt.any.rg.100 / 100]
dat[, peer.adopt.rg.100.scale      := peer.adopt.rg.100 / 100]
dat[, peer.adopt.cs.100.scale      := peer.adopt.cs.100 / 100]
dat[, peer.adopt.both.100.scale    := peer.adopt.both.100 / 100]

# Keep post-2010 only 
dat <- dat[year > 2010]

# Create average eligibility variable ------------------------------------------
dat[, elig.peers.avg.100 := elig.peers.cs.100 + elig.peers.rg.100]

# Figure 3a: Spatial Heterogeneity by Basin ------------------------------------
basins <- feols(adopt.any * 100 ~ peers.100 | year + blkgrp | 
                  peer.adopt.any.100 ~ elig.peers.avg.100,
                cluster = 'blkgrp', split = ~basin,
                data = dat[elig.cs == 1 & post.rainwise == 0])
etable(basins)

coef <- coefplot(basins, keep = 'peer.adopt.any.100',
                 dict = c(peer.adopt.any.100 = '$\\widehat{\\text{Peer Adoptions}}$'))

coef.plot <- as.data.table(coef$prms)
coef.plot[, basins := names(table(dat$basin))]
coef.plot[, estimate_names := NULL]

m1 <- feols(adopt.any * 100 ~ peers.100 | year + blkgrp | 
              peer.adopt.any.100 ~ elig.peers.avg.100,
            cluster = 'blkgrp',
            data = dat[elig.cs == 1 & post.rainwise == 0])
m1$coeftable[3]

gg <- ggplot(coef.plot, aes(x = as.factor(basins), y = y)) + 
  geom_point(size = 3, color = 'dodgerblue') +
  geom_errorbar(aes(ymin = ci_low, ymax = ci_high), linewidth = 1, width = 0.5,
                color = 'dodgerblue') + 
  geom_hline(yintercept = m1$coeftable[1], linetype = 'dashed', color = 'forestgreen') +
  geom_hline(yintercept = m1$coeftable[1] + m1$coeftable[3]*1.96, linetype = 3, color = 'forestgreen') +
  geom_hline(yintercept = m1$coeftable[1] - m1$coeftable[3]*1.96, linetype = 3, color = 'forestgreen') +
  xlab("Individual basins") + ylab("Effect on Adoption (percentage points)") + 
  scale_x_discrete(guide = guide_axis(angle = 45)) +
  scale_y_continuous(limits = c(-9, 8), breaks = seq(-9, 8, by = 1)) +
  theme_bw(base_size = 20)
print(gg)

ggsave("output/figures/figure_3a.png", gg, width = 150, height = 150,
       units = "mm", dpi = 150, scale = 2)

# Figure 3b: Spatial Heterogeneity by Year -------------------------------------
years <- feols(adopt.any * 100 ~ peers.100 | blkgrp | 
                 peer.adopt.any.100 ~ elig.peers.avg.100,
               cluster = 'blkgrp', split = ~year,
               data = dat[elig.cs == 1 & post.rainwise == 0 & year > 2010])
etable(years)

coef <- coefplot(years, keep = 'peer.adopt.any.100',
                 dict = c(peer.adopt.any.100 = '$\\widehat{\\text{Peer Adoptions}}$'))

coef.plot <- as.data.table(coef$prms)
coef.plot[, years := names(table(dat[year > 2010, year]))]

m1$coeftable[3]

gg <- ggplot(coef.plot, aes(x = as.factor(years), y = y)) + 
  geom_point(size = 3, color = 'dodgerblue') +
  geom_errorbar(aes(ymin = ci_low, ymax = ci_high), linewidth = 1, width = 0.5,
                color = 'dodgerblue') + 
  geom_hline(yintercept = m1$coeftable[1], linetype = 'dashed', color = 'forestgreen') +
  geom_hline(yintercept = m1$coeftable[1] + m1$coeftable[3]*1.96, linetype = 3, color = 'forestgreen') +
  geom_hline(yintercept = m1$coeftable[1] - m1$coeftable[3]*1.96, linetype = 3, color = 'forestgreen') +
  xlab("Individual years") + ylab("Effect on Adoption (percentage points)") + 
  scale_x_discrete(guide = guide_axis(angle = 45)) +
  theme_bw(base_size = 20)
print(gg)

ggsave("output/figures/figure_3b.png", gg, width = 150, height = 150,
       units = "mm", dpi = 150, scale = 2)

# Table 4: Robustness to Removing Heterogeneous Basins -------------------------
large <- c('E. Ballard/W. Phinney Ridge','Madison Park','Queen Anne','Fremont/Wallingford')
neg <- c('Portage Bay','Duwamish','Barton/Fauntleroy')
'%!in%' <- function(x, y)!('%in%'(x, y))

m2 <- update(m1, data = dat[elig.cs == 1 & post.rainwise == 0 & basin %!in% large])
m3 <- update(m1, data = dat[elig.cs == 1 & post.rainwise == 0 & basin %!in% neg])
m4 <- update(m1, data = dat[elig.cs == 1 & post.rainwise == 0 & basin %!in% c(large, neg)])

load('data/iv/iv_diag_basins.RData')
for (i in 1:4) {
  assign(paste0('eff.f.', i), round(iv_diag_basins[[i]]$F_stat[4], 2))
  assign(paste0('ar.ci.', i), paste0('[', round(iv_diag_basins[[i]]$AR$ci[1], 2), ', ',
                                     round(iv_diag_basins[[i]]$AR$ci[2], 2), ']'))
}

varnames <- c('year' = 'Year', 'blkgrp' = 'Block Group',
              'peer.adopt.any.100' = '$\\widehat{\\text{Peer Adoptions}}$')

etable(m1, m2, m3, m4, fixef_sizes = TRUE, depvar = FALSE, se.below = TRUE,
       dict = varnames, drop = c('peers.100'), fitstat = c('n'),
       extralines = list('_Sample' = c('Full', 'No Large', 'No Negative', 'No Large or Negative'),
                         '_F' = c(eff.f.1, eff.f.2, eff.f.3, eff.f.4),
                         '_AR CI' = c(ar.ci.1, ar.ci.2, ar.ci.3, ar.ci.4)))

etable(m1, m2, m3, m4, tex = TRUE, fixef_sizes = TRUE, depvar = FALSE, se.below = TRUE,
       dict = varnames, drop = c('peers.100'), 
       signif.code = c('*' = 0.1, '**' = 0.05, '***' = 0.01),
       extralines = list('_Sample' = c('Full', 'No Large', 'No Negative', 'No Large or Negative'),
                         '_F' = c(eff.f.1, eff.f.2, eff.f.3, eff.f.4),
                         '_AR CI' = c(ar.ci.1, ar.ci.2, ar.ci.3, ar.ci.4)),
       file = 'output/tables/table_4.tex', replace = TRUE)